In [2]:
import pandas as pd

# Geographic Information System Visualization
import folium
from folium import plugins
import webbrowser
import geopandas as gp
import vincent, json
import altair as alt
import reverse_geocoder as rg
In [3]:
# Power Plants

power_plants = pd.read_csv('downloads/US_Dataset_PowerPlants_Locations_Nature_County.csv')

power_plants.head()
Out[3]:
country country_long name gppd_idnr capacity_mw latitude longitude primary_fuel commissioning_year owner source url geolocation_source generation_gwh_2013 generation_gwh_2014 generation_gwh_2015 generation_gwh_2016 generation_gwh_2017
0 USA United States of America 12 Applegate Solar LLC USA0059371 1.9 40.2003 -74.5761 Solar 2012.0 SunRay Power LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration 2.41461 2.350 2.430 2.492 2.276
1 USA United States of America 126 Grove Solar LLC USA0060858 2.0 42.0761 -71.4227 Solar 2012.0 126 Grove Solar LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN NaN 2.416 2.251
2 USA United States of America 1420 Coil Av #C USA0057310 1.3 33.7943 -118.2414 Solar 2011.0 Konoike Pacific U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration 1.52800 2.149 1.515 1.592 1.660
3 USA United States of America 145 Talmadge Solar USA0057458 3.8 40.5358 -74.3913 Solar 2011.0 Avidan Energy Solutions U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration 5.03600 4.524 4.802 5.051 4.819
4 USA United States of America 1515 S Caron Road USA0007770 4.2 41.9084 -89.0466 Gas 2000.0 Rochelle Municipal Utilities U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration 0.21500 0.178 0.271 0.306 0.264
In [4]:
power_plants.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8686 entries, 0 to 8685
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   country              8686 non-null   object 
 1   country_long         8686 non-null   object 
 2   name                 8686 non-null   object 
 3   gppd_idnr            8686 non-null   object 
 4   capacity_mw          8686 non-null   float64
 5   latitude             8686 non-null   float64
 6   longitude            8686 non-null   float64
 7   primary_fuel         8686 non-null   object 
 8   commissioning_year   8665 non-null   float64
 9   owner                8673 non-null   object 
 10  source               8686 non-null   object 
 11  url                  8686 non-null   object 
 12  geolocation_source   8686 non-null   object 
 13  generation_gwh_2013  6343 non-null   float64
 14  generation_gwh_2014  6761 non-null   float64
 15  generation_gwh_2015  7212 non-null   float64
 16  generation_gwh_2016  7944 non-null   float64
 17  generation_gwh_2017  8535 non-null   float64
dtypes: float64(9), object(9)
memory usage: 1.2+ MB
In [4]:
# https://github.com/wri/global-power-plant-database
# https://www.kaggle.com/eshaan90/global-power-plant-database#README.txt

Null_rows = power_plants[power_plants.isnull().any(axis=1)]
display(Null_rows)

# through searching information online, I can find that
# the NaN values in the columns (generation_gwh_2013/2014/2015/2016/2017) means
# electricity generation in gigawatt-hours reported for the year is zero
# so let's fill it by 0 and check null values again

power_plants.loc[:,power_plants.columns.isin(['generation_gwh_2013','generation_gwh_2014','generation_gwh_2015','generation_gwh_2016','generation_gwh_2017'])] = \
power_plants.loc[:,power_plants.columns.isin(['generation_gwh_2013','generation_gwh_2014','generation_gwh_2015','generation_gwh_2016','generation_gwh_2017'])]\
                   .fillna(0).reset_index(drop=True)
print('\n')
print('---------------------------------------------------------------')
print('\n')

Null_rows_2 = power_plants[power_plants.isnull().any(axis=1)]
display(Null_rows_2)
print(Null_rows_2.shape)

# so there is still some null values
# and I will drop these rows

power_plants = power_plants.dropna(how='any',axis=0)

print('\n')
print('---------------------------------------------------------------')
print('\n')

display(power_plants.isnull().sum())
country country_long name gppd_idnr capacity_mw latitude longitude primary_fuel commissioning_year owner source url geolocation_source generation_gwh_2013 generation_gwh_2014 generation_gwh_2015 generation_gwh_2016 generation_gwh_2017
1 USA United States of America 126 Grove Solar LLC USA0060858 2.0 42.0761 -71.4227 Solar 2012.0 126 Grove Solar LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN NaN 2.416 2.251
5 USA United States of America 158th Fighter Wing Solar Farm USA0060542 1.3 44.4777 -73.1534 Solar 2011.0 158th Fighter Wing U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN NaN 1.655 1.616
8 USA United States of America 201 Sturbridge B USA0059822 2.0 42.1091 -72.1712 Solar 2014.0 SE Solarne2 LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN 2.969 2.979 2.835 2.722
9 USA United States of America 205 Sturbridge A USA0059821 3.0 42.1093 -72.1705 Solar 2014.0 SH Solarne2 LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN 2.969 5.326 5.319 5.041
10 USA United States of America 2081 Terzian Solar Project USA0058918 1.2 36.6700 -119.4158 Solar 2014.0 Solarenewal LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN 1.918 3.444 2.601 3.016
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8673 USA United States of America ZV Solar 2 LLC USA0061257 4.9 34.8160 -79.2000 Solar 2017.0 CD Global Solar Holdings LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN NaN NaN 4.165
8674 USA United States of America ZV Solar 3 LLC USA0060549 5.0 34.8162 -79.2002 Solar 2016.0 SRE Utility Solar 1 LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN NaN 8.204 10.379
8678 USA United States of America Zephyr Wind USA0060128 4.5 41.0961 -83.6414 Wind 2016.0 OEE XIX LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN 0.000 10.791 11.470
8679 USA United States of America Zero Waste Energy Development Co LLC USA0060220 1.6 37.4325 -121.9528 Biomass 2015.0 Zero Waste Energy Development Company LLC U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN NaN 5.814 5.989 6.957
8680 USA United States of America Zimmerman Energy USA0059425 6.0 41.2378 -86.2461 Waste 2014.0 Landfill Energy Systems U.S. Energy Information Administration http://www.eia.gov/electricity/data/browser/ U.S. Energy Information Administration NaN 3.545 49.212 46.399 45.898

2442 rows × 18 columns


---------------------------------------------------------------


country country_long name gppd_idnr capacity_mw latitude longitude primary_fuel commissioning_year owner source url geolocation_source generation_gwh_2013 generation_gwh_2014 generation_gwh_2015 generation_gwh_2016 generation_gwh_2017
36 USA United States of America A.E.S. Corp. WRI1026808 454.3 17.9474 -66.1494 Coal NaN AES Corporation Fluor; PREPA http://www.fluor.com/projects/solid-fueled-pow... WRI 0.0 0.0 0.0 0.0 0.0
55 USA United States of America AES Ilumina WRI1026807 24.0 17.9468 -66.1591 Solar NaN AES Corporation PV Tech http://www.pv-tech.org/news/aes_solar_closes_f... WRI 0.0 0.0 0.0 0.0 0.0
121 USA United States of America Aguirre WRI1028012 42.0 17.9533 -66.2312 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
1195 USA United States of America Cambalache WRI1026810 247.5 18.4691 -66.6968 Oil NaN PREPA PREPA http://www.prepa.com/AEEES2_ENG.ASP WRI 0.0 0.0 0.0 0.0 0.0
1244 USA United States of America Caonillas 1 WRI1028015 18.0 18.2961 -66.6432 Hydro NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
1245 USA United States of America Caonillas 2 WRI1028016 3.6 18.2397 -66.6691 Hydro NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
1756 USA United States of America Costa Sur WRI1028011 42.0 18.0018 -66.7537 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
1916 USA United States of America Daguao WRI1028013 42.0 18.2325 -65.6677 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
2298 USA United States of America EcoEléctrica WRI1026812 507.0 17.9812 -66.7552 Gas NaN Gas Natural Fenosa International Power CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
3876 USA United States of America Jobos WRI1028010 42.0 17.9615 -66.1402 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
4712 USA United States of America Mayagüez WRI1028007 220.0 18.2194 -67.1600 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
5752 USA United States of America Palo Seco WRI1028008 126.0 18.4551 -66.1498 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
6574 USA United States of America Río Blanco WRI1026831 5.0 18.2436 -65.7852 Hydro NaN PREPA PREPA http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
6679 USA United States of America Salinas WRI1026820 15.5 17.9786 -66.2205 Solar NaN Sonnedix Sonnedix http://www.sonnedix.com/news/sonnedix-and-yaro... WRI 0.0 0.0 0.0 0.0 0.0
6716 USA United States of America San Fermin Solar Farm WRI1028017 27.0 18.4055 -65.9077 Solar NaN NaN TSK http://www.grupotsk.com/proyectos/puerto-rico WRI 0.0 0.0 0.0 0.0 0.0
7999 USA United States of America Vega Baja WRI1028009 42.0 18.4458 -66.3914 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
8044 USA United States of America Vieques EPP WRI1028006 6.0 18.1429 -65.4440 Oil NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
8538 USA United States of America Windmar Ponce WRI1026819 4.5 18.0623 -66.5485 Solar NaN Windmar Renewable Energy Windmar Renewable Energy http://www.windmarpv.com/#filter=* WRI 0.0 0.0 0.0 0.0 0.0
8628 USA United States of America Yabucoa WRI1028014 42.0 18.1064 -65.8249 Gas NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
8647 USA United States of America Yauco 1 WRI1026828 25.0 18.1106 -66.8711 Hydro NaN NaN CEPR http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
8648 USA United States of America Yauco 2 WRI1026829 9.0 18.0536 -66.8837 Hydro NaN PREPA PREPA http://energia.pr.gov/datos/plantas/ WRI 0.0 0.0 0.0 0.0 0.0
(21, 18)


---------------------------------------------------------------


country                0
country_long           0
name                   0
gppd_idnr              0
capacity_mw            0
latitude               0
longitude              0
primary_fuel           0
commissioning_year     0
owner                  0
source                 0
url                    0
geolocation_source     0
generation_gwh_2013    0
generation_gwh_2014    0
generation_gwh_2015    0
generation_gwh_2016    0
generation_gwh_2017    0
dtype: int64
In [5]:
# drop the columns we do not need

power_plants = power_plants.drop(['country','country_long','source','url','geolocation_source'],axis=1)
power_plants['commisioning_year'] = [int(i) for i in power_plants['commissioning_year']]
power_plants.head()
Out[5]:
name gppd_idnr capacity_mw latitude longitude primary_fuel commissioning_year owner generation_gwh_2013 generation_gwh_2014 generation_gwh_2015 generation_gwh_2016 generation_gwh_2017 commisioning_year
0 12 Applegate Solar LLC USA0059371 1.9 40.2003 -74.5761 Solar 2012.0 SunRay Power LLC 2.41461 2.350 2.430 2.492 2.276 2012
1 126 Grove Solar LLC USA0060858 2.0 42.0761 -71.4227 Solar 2012.0 126 Grove Solar LLC 0.00000 0.000 0.000 2.416 2.251 2012
2 1420 Coil Av #C USA0057310 1.3 33.7943 -118.2414 Solar 2011.0 Konoike Pacific 1.52800 2.149 1.515 1.592 1.660 2011
3 145 Talmadge Solar USA0057458 3.8 40.5358 -74.3913 Solar 2011.0 Avidan Energy Solutions 5.03600 4.524 4.802 5.051 4.819 2011
4 1515 S Caron Road USA0007770 4.2 41.9084 -89.0466 Gas 2000.0 Rochelle Municipal Utilities 0.21500 0.178 0.271 0.306 0.264 2000
In [6]:
colordict = {'Solar': 'green', 'Gas': 'darkpurple', 'Hydro': 'darkgreen', 'Wind': 'blue',
             'Coal':'black','Biomass':'gray','Waste':'cadetblue','Cogeneration':'orange',
             'Storage':'beige','Geothermal':'white','Nuclear':'darkred','Petcoke':'lightgray',
             'Other':'pink','Oil':'lightred'}

# each color means one fuel type
In [7]:
# draw the plot for every power plants with information
# after clicking, you can get Name, Commision Year and Primary Fuel.

latitude = 37.0902
longitude = -95.7129
schools_map = folium.Map(location=[latitude, longitude], zoom_start=5)
marker_cluster = plugins.MarkerCluster().add_to(schools_map)
for name,row in power_plants.iterrows():
     folium.Marker([row["latitude"], row["longitude"]], 
                   popup = ('<h5>' + 'Name: ' + '</h5>' + row['name'] + '<br>' 
                            '<h5>' + 'Commision Year: ' + '</h5>'+ str(row[6]) + '<br>'
                            '<h5>' + 'Primary Fuel: ' + '</h5>' + str(row[5])),
                   icon = folium.Icon(color = colordict[row[5]],
                   icon_color = 'yellow',icon='cloud')).add_to(marker_cluster)     

display(schools_map)
In [8]:
# draw the each power plants' generation from 2013 to 2017 with bar plot
# if after clicking the bar plot does not show up, you can click '...' to download it.

# because if I show all power plants' bar plots, my computer will crash
# therefore, I just select 40% data samples.

latitude = 37.0902
longitude = -95.7129
schools_map = folium.Map(location=[latitude, longitude], zoom_start=5)
marker_cluster = plugins.MarkerCluster().add_to(schools_map)

power_plants = power_plants.sample(frac=0.4)

for name,row in power_plants.iterrows():
    source = {
    "title": str(row[0]), 
    "data": {
    "values": [
      {"value": row[8],"variables": '2013'},
      {"value": row[9],"variables": "2014"},
      {"value": row[10],"variables": "2015"},
      {"value": row[11],"variables": "2016"},
      {"value": row[12],"variables": "2017"
      }
    ]
  },
  "encoding": {
    "color": {
      "field": "value",
      "scale": {"domain": [1,row[12]],"scheme": "yelloworangered"},
      "type": "quantitative"
    },
    "x": {"field": "variables","type": "nominal"},
    "y": {
      "field": "value",
      "type": "quantitative",
      "scale": {"domain": [0,row[12]]}
    }
  },
  "mark": "bar"
}
    vega = folium.features.VegaLite(source)
    popup = folium.Popup(parse_html=True)
    vega.add_to(popup)
    folium.Marker([row["latitude"], row["longitude"]],popup).add_to(marker_cluster)
    
display(schools_map)
In [9]:
# transform latitude and longitude to state, county

geo = []

for lat,lon in zip(power_plants['latitude'],power_plants['longitude']):
    geo.append((lat,lon))

def trans_lat_lon(geolocation):
    coordinates = geolocation
    results = rg.search(coordinates)
    return results

geo_dict = trans_lat_lon([i for i in geo])

geo_state = [list(geo_dict[i].values())[3] for i in range(len(geo_dict))]
geo_county = [list(geo_dict[i].values())[4] for i in range(len(geo_dict))]

power_plants['State'] = geo_state
power_plants['Counties'] = geo_county

power_plants.head()
Loading formatted geocoded file...
Out[9]:
name gppd_idnr capacity_mw latitude longitude primary_fuel commissioning_year owner generation_gwh_2013 generation_gwh_2014 generation_gwh_2015 generation_gwh_2016 generation_gwh_2017 commisioning_year State Counties
2411 Enoree Phase II Landfill Gas Recovery USA0056951 3.2 34.8008 -82.1847 Waste 2008.00000 Enerdyne Power Systems Inc 17.765 15.35856 15.481 14.366 13.986 2008 South Carolina Greenville County
1682 Conejos 1 Community Solar Array USA0060723 1.5 37.1959 -105.9824 Solar 2017.00000 Clean Focus Renewables Inc. 0.000 0.00000 0.000 0.000 0.132 2017 Colorado Conejos County
5581 Optimum Wind 4 LLC USA0059226 3.0 42.0164 -93.5158 Wind 2017.00000 Optimum Wind 4 LLC 0.000 0.00000 0.000 0.000 1.137 2017 Iowa Story County
2214 E J West USA0006527 22.0 43.3189 -73.9208 Hydro 1930.00000 Erie Boulevard Hydropower LP 62.996 67.44000 47.062 45.853 77.890 1930 New York Saratoga County
970 Browns Ferry USA0000046 3494.0 34.7042 -87.1189 Nuclear 1975.35146 Tennessee Valley Authority 26718.113 26738.30000 27669.694 26214.623 27847.879 1975 Alabama Morgan County
In [10]:
power_plants.to_csv('power_plants_cleaned.csv',index=False)